Task 1 a

Read data from rds files, get unique instances, include selected bet types=“1x2”,“bts”,“ha”,“ou”,“dc”, only work with initial odd

matchdata<-readRDS('C:\\Users\\10025\\Desktop\\IE582\\hw2/df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds', refhook = NULL)
odddata<-data.table(readRDS('C:\\Users\\10025\\Desktop\\IE582\\hw2/df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds', refhook = NULL))
matchdata=matchdata[complete.cases(matchdata)]
matchdata<-unique(matchdata)

odddata<- odddata[betType %in% c("1x2","bts","ha","ou","dc")]

odddata=odddata[order(matchId, oddtype,bookmaker,date)]

odddata_initial=odddata[,list(start_odd=odd[1]),
                                  by=list(matchId,oddtype,bookmaker,totalhandicap)]

Select 5 bookmakers, and combined them into one dataset

odddata_initial_10Bet<-odddata_initial[bookmaker=="10Bet"]
wide_odds_initial_10Bet=dcast(odddata_initial_10Bet,
                        matchId~oddtype+totalhandicap,
                        value.var='start_odd')

odddata_initial_bet365<-odddata_initial[bookmaker=="bet365"]
wide_odds_initial_bet365=dcast(odddata_initial_bet365,
                              matchId~oddtype+totalhandicap,
                              value.var='start_odd')

odddata_initial_BetfairExchange<-odddata_initial[bookmaker=="Betfair Exchange"]
wide_odds_initial_BetfairExchange=dcast(odddata_initial_BetfairExchange,
                              matchId~oddtype+totalhandicap,
                              value.var='start_odd')

odddata_initial_bwin<-odddata_initial[bookmaker=="bwin"]
wide_odds_initial_bwin=dcast(odddata_initial_bwin,
                              matchId~oddtype+totalhandicap,
                              value.var='start_odd')

odddata_initial_Betsson<-odddata_initial[bookmaker=="Betsson"]
wide_odds_initial_Betsson=dcast(odddata_initial_Betsson,
                              matchId~oddtype+totalhandicap,
                              value.var='start_odd')

m <- rbind(wide_odds_initial_10Bet, wide_odds_initial_bet365,wide_odds_initial_BetfairExchange,wide_odds_initial_bwin,wide_odds_initial_Betsson, fill=TRUE)

For combined dataset

z<-merge(x = matchdata, y = m, by = "matchId", all = TRUE,allow.cartesian=TRUE)

z<-setDT(z)[, paste0("score", 1:2) := tstrsplit(score, ":")]

z[,72]<-as.numeric(unlist(z[,72]))
z[,73]<-as.numeric(unlist(z[,73]))
z$totalscore<-z$score1+z$score2 
z$ou<-ifelse(z[,74]<2.5,"1","2")
z[,75]<-as.numeric(unlist(z[,75]))

data<-z[,-c(1:7)]
data_ou25<-data[!(is.na(data$over_2.5) | data$over_2.5==""), ]
asdas<-data_ou25[,c("over_0.5","over_0.75", "over_1", "over_1.75", "over_10", "over_10.5", "over_11", "over_11.5", "over_2", "over_2.25", "over_2.29", "over_2.75", "over_3", "over_3.25", "over_3.75", "over_4", "over_4.25", "over_4.5", "over_5","over_5.25","over_5.5","over_6.5","over_7.5", "over_8.5"):=NULL] 
asdas<-asdas[,c("under_0.5","under_0.75", "under_1", "under_1.75", "under_10", "under_10.5", "under_11", "under_11.5", "under_2", "under_2.25", "under_2.29", "under_2.75", "under_3", "under_3.25", "under_3.75", "under_4", "under_4.25", "under_4.5", "under_5","under_5.25","under_5.5","under_6.5","under_7.5", "under_8.5"):=NULL]

pat=asdas[complete.cases(asdas)]
dat<-pat[,-c(17:20)]
ress<-pat[,c(20)]
a<-as.numeric(unlist(ress[,1]))

pc <- princomp(dat)
plot(pc)

summary(pc, loadings=T)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3    Comp.4     Comp.5
## Standard deviation     4.5147411 2.1823122 1.41829685 0.8848934 0.75133113
## Proportion of Variance 0.7046297 0.1646376 0.06953917 0.0270693 0.01951452
## Cumulative Proportion  0.7046297 0.8692672 0.93880640 0.9658757 0.98539023
##                             Comp.6      Comp.7      Comp.8      Comp.9
## Standard deviation     0.493618466 0.261880530 0.193129893 0.182370733
## Proportion of Variance 0.008423217 0.002370837 0.001289419 0.001149756
## Cumulative Proportion  0.993813450 0.996184287 0.997473706 0.998623462
##                             Comp.10      Comp.11      Comp.12      Comp.13
## Standard deviation     0.1151012584 0.1067004306 0.0784575025 0.0748793518
## Proportion of Variance 0.0004579893 0.0003935751 0.0002127964 0.0001938293
## Cumulative Proportion  0.9990814509 0.9994750260 0.9996878223 0.9998816516
##                             Comp.14      Comp.15      Comp.16
## Standard deviation     4.433805e-02 3.203779e-02 2.076518e-02
## Proportion of Variance 6.795922e-05 3.548301e-05 1.490619e-05
## Cumulative Proportion  9.999496e-01 9.999851e-01 1.000000e+00
## 
## Loadings:
##           Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## 1_NA       0.157  0.410  0.319  0.264  0.198  0.672  0.189              
## 12_NA                                                                   
## 1X_NA             0.164  0.113                0.212                     
## 2_NA      -0.557 -0.123  0.651 -0.422                       0.130  0.221
## NO_NA                                 -0.212                0.764 -0.254
## X2_NA     -0.204         0.136                       0.264 -0.412 -0.791
## YES_NA                         -0.136  0.125  0.111         0.383 -0.444
## odd1_NA    0.197  0.662  0.239 -0.156  0.204 -0.456 -0.413        -0.115
## odd2_NA   -0.723  0.215 -0.351  0.393  0.218        -0.314              
## oddX_NA   -0.198  0.414 -0.168               -0.336  0.769  0.132  0.184
## over_1.5                                                                
## over_2.5                -0.164 -0.236  0.135  0.104                     
## over_3.5                -0.425 -0.642  0.379  0.287        -0.132       
## under_1.5 -0.119  0.329 -0.113 -0.232 -0.751  0.249 -0.157 -0.199       
## under_2.5         0.114               -0.251                            
## under_3.5                             -0.109                            
##           Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16
## 1_NA                       0.185   0.251                         
## 12_NA              0.171  -0.337           0.914                 
## 1X_NA                     -0.580  -0.692  -0.272                 
## 2_NA                                                             
## NO_NA      0.286   0.417                  -0.118                 
## X2_NA      0.121   0.207                                         
## YES_NA    -0.504  -0.566          -0.116   0.104                 
## odd1_NA                                                          
## odd2_NA                                                          
## oddX_NA                                                          
## over_1.5                                           0.273  -0.952 
## over_2.5   0.287  -0.326  -0.585   0.551  -0.162           0.140 
## over_3.5           0.226   0.236  -0.197                         
## under_1.5 -0.310                                                 
## under_2.5  0.665  -0.515   0.287  -0.244   0.157  -0.183         
## under_3.5  0.119           0.142  -0.121           0.931   0.252
# Get principal component vectors using prcomp instead of princomp
pc = prcomp(dat,center = TRUE,scale = TRUE) 

# First 7 principal components
comp <- data.frame(pc$x[,1:7])
# Plot
plot(comp, pch=16, col=a, main="Graph of Different PCs in 2-D")

ind <- get_pca_ind(pc)
f<-ind$coord

##eigenvalues
eig.val <- get_eigenvalue(pc)
eig.val
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  5.81520173      36.34501083                    36.34501
## Dim.2  4.26233605      26.63960033                    62.98461
## Dim.3  3.38070808      21.12942550                    84.11404
## Dim.4  1.21447177       7.59044858                    91.70449
## Dim.5  0.37108323       2.31927021                    94.02376
## Dim.6  0.32281186       2.01757413                    96.04133
## Dim.7  0.22805859       1.42536620                    97.46670
## Dim.8  0.13588317       0.84926984                    98.31597
## Dim.9  0.07785377       0.48658606                    98.80255
## Dim.10 0.05470026       0.34187664                    99.14443
## Dim.11 0.03795848       0.23724050                    99.38167
## Dim.12 0.02725929       0.17037057                    99.55204
## Dim.13 0.02272841       0.14205258                    99.69409
## Dim.14 0.02086661       0.13041631                    99.82451
## Dim.15 0.01632796       0.10204978                    99.92656
## Dim.16 0.01175071       0.07344194                   100.00000
fviz_eig(pc)

##graph of indiviudals
fviz_pca_ind(pc,
             #col.ind = "cos2", # Color by the quality of representation
             col.ind = a, # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)

##graph of variables
fviz_pca_var(pc,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

##graph combined
fviz_pca_biplot(pc, #repel = TRUE,
                col.var = "#2E9FDF", # Variables color
                col.ind = a
)

For only 1 bookmaker

z1<-merge(x = matchdata, y = wide_odds_initial_10Bet, by = "matchId", all = TRUE,allow.cartesian=TRUE)

z1<-setDT(z1)[, paste0("score", 1:2) := tstrsplit(score, ":")]

z1[,70]<-as.numeric(unlist(z1[,70]))
z1[,71]<-as.numeric(unlist(z1[,71]))
z1$totalscore<-z1$score1+z1$score2 
z1$ou<-ifelse(z1[,72]<2.5,"1","2")
z1[,73]<-as.numeric(unlist(z1[,73]))

data1<-z1[,-c(1:7)]
data1_ou25<-data1[!(is.na(data1$over_2.5) | data1$over_2.5==""), ]
asdas1<-data1_ou25[,c("over_0.5","over_0.75", "over_1", "over_1.75", "over_10", "over_10.5", "over_11", "over_11.5", "over_2", "over_2.25", "over_2.29", "over_2.75", "over_3", "over_3.25", "over_3.75", "over_4", "over_4.25", "over_4.5", "over_5","over_5.25","over_5.5","over_6.5","over_7.5"):=NULL] 
asdas1<-asdas1[,c("under_0.5","under_0.75", "under_1", "under_1.75", "under_10", "under_10.5", "under_11", "under_11.5", "under_2", "under_2.25", "under_2.29", "under_2.75", "under_3", "under_3.25", "under_3.75", "under_4", "under_4.25", "under_4.5", "under_5","under_5.25","under_5.5","under_6.5","under_7.5"):=NULL]

pat1=asdas1[complete.cases(asdas1)]
dat1<-pat1[,-c(17:20)]
ress1<-pat1[,c(20)]
a1<-as.numeric(unlist(ress1[,1]))

pc1 <- princomp(dat1)
plot(pc1)

summary(pc1, loadings=T)
## Importance of components:
##                          Comp.1    Comp.2     Comp.3      Comp.4
## Standard deviation     5.100364 2.2335837 0.79667758 0.415445890
## Proportion of Variance 0.813601 0.1560320 0.01985064 0.005398066
## Cumulative Proportion  0.813601 0.9696331 0.98948369 0.994881756
##                             Comp.5      Comp.6       Comp.7       Comp.8
## Standard deviation     0.253697098 0.207286900 0.1771706641 0.1028758624
## Proportion of Variance 0.002012984 0.001343857 0.0009817318 0.0003310063
## Cumulative Proportion  0.996894740 0.998238596 0.9992203280 0.9995513343
##                             Comp.9     Comp.10      Comp.11      Comp.12
## Standard deviation     0.081885891 0.063207507 4.316411e-02 2.679655e-02
## Proportion of Variance 0.000209714 0.000124953 5.827132e-05 2.245778e-05
## Cumulative Proportion  0.999761048 0.999886001 9.999443e-01 9.999667e-01
##                             Comp.13      Comp.14      Comp.15      Comp.16
## Standard deviation     2.310965e-02 1.994775e-02 9.955158e-03 5.716233e-03
## Proportion of Variance 1.670306e-05 1.244506e-05 3.099599e-06 1.021949e-06
## Cumulative Proportion  9.999834e-01 9.999959e-01 9.999990e-01 1.000000e+00
## 
## Loadings:
##           Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## 1_NA       0.149  0.453  0.130                0.787  0.225              
## 12_NA                                                              0.160
## 1X_NA             0.196                       0.133        -0.111  0.343
## 2_NA      -0.473               -0.819 -0.171               -0.235       
## NO_NA                   -0.181        -0.114               -0.178  0.643
## X2_NA     -0.194               -0.181  0.149                0.898  0.295
## YES_NA                   0.178                              0.170 -0.527
## odd1_NA    0.243  0.722  0.270        -0.279 -0.495 -0.103              
## odd2_NA   -0.782  0.154  0.198  0.534 -0.194                            
## oddX_NA   -0.172  0.313                0.870 -0.112 -0.215 -0.214       
## over_1.5                                                                
## over_2.5                 0.223                       0.200              
## over_3.5         -0.194  0.579         0.222 -0.195  0.631              
## under_1.5         0.225 -0.588               -0.152  0.602        -0.245
## under_2.5               -0.223                       0.225              
## under_3.5               -0.101                       0.125              
##           Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16
## 1_NA       0.258                                                 
## 12_NA              0.183   0.366   0.793   0.389  -0.142         
## 1X_NA     -0.846  -0.284                                         
## 2_NA                                                             
## NO_NA              0.650          -0.227                         
## X2_NA                                                            
## YES_NA    -0.439   0.659                                         
## odd1_NA                                                          
## odd2_NA                                                          
## oddX_NA                                                          
## over_1.5                                           0.495  -0.856 
## over_2.5                   0.307   0.251  -0.856                 
## over_3.5                  -0.142  -0.152   0.265                 
## under_1.5                 -0.311   0.183                         
## under_2.5         -0.116   0.778  -0.432   0.140  -0.206  -0.117 
## under_3.5                  0.197                   0.821   0.490
# Get principal component vectors using prcomp instead of princomp
pc1 = prcomp(dat1,center = TRUE,scale = TRUE) 

# First 7 principal components
comp1 <- data.frame(pc1$x[,1:7])
# Plot
plot(comp1, pch=16, col=a1, main="Graph of Different PCs in 2-D")

ind1 <- get_pca_ind(pc1)
f1<-ind1$coord

##eigenvalues
eig.val1 <- get_eigenvalue(pc1)
eig.val1
##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1  8.428183983     52.676149892                    52.67615
## Dim.2  4.836710913     30.229443204                    82.90559
## Dim.3  2.197274492     13.732965577                    96.63856
## Dim.4  0.215728471      1.348302944                    97.98686
## Dim.5  0.107191178      0.669944864                    98.65681
## Dim.6  0.068191267      0.426195417                    99.08300
## Dim.7  0.048488311      0.303051943                    99.38605
## Dim.8  0.032060096      0.200375602                    99.58643
## Dim.9  0.019873122      0.124207013                    99.71064
## Dim.10 0.015732548      0.098328428                    99.80896
## Dim.11 0.008413168      0.052582299                    99.86155
## Dim.12 0.007891985      0.049324905                    99.91087
## Dim.13 0.005736325      0.035852032                    99.94672
## Dim.14 0.003787905      0.023674405                    99.97040
## Dim.15 0.003139243      0.019620267                    99.99002
## Dim.16 0.001596993      0.009981207                   100.00000
fviz_eig(pc1)

##graph of indiviudals
fviz_pca_ind(pc1,
             #col.ind = "cos2", # Color by the quality of representation
             col.ind = a1, # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
             #repel = TRUE     # Avoid text overlapping
)

##graph of variables
fviz_pca_var(pc1,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

##graph combined
fviz_pca_biplot(pc1, #repel = TRUE,
                col.var = "#2E9FDF", # Variables color
                col.ind = a1
                # col.ind = "#696969"  # Individuals color
)

##Task 2

z$totalscore<-z$score1+z$score2 
z$ou<-ifelse(z$score1==z$score2,"1",ifelse(z$score1>z$score2,"2","3"))
z1x2<-z
z1x2[,75]<-as.numeric(unlist(z1x2[,75]))

data1x2<-z1x2[,-c(1:7)]

data_1x2<-data1x2[!(is.na(data$odd1_NA) | data$odd1_NA==""), ]
asdas1x2<-data_1x2[,c("over_0.5","over_0.75", "over_1", "over_1.75", "over_10", "over_10.5", "over_11", "over_11.5", "over_2", "over_2.25", "over_2.29", "over_2.75", "over_3", "over_3.25", "over_3.75", "over_4", "over_4.25", "over_4.5", "over_5","over_5.25","over_5.5","over_6.5","over_7.5", "over_8.5"):=NULL] 
asdas1x2<-asdas1x2[,c("under_0.5","under_0.75", "under_1", "under_1.75", "under_10", "under_10.5", "under_11", "under_11.5", "under_2", "under_2.25", "under_2.29", "under_2.75", "under_3", "under_3.25", "under_3.75", "under_4", "under_4.25", "under_4.5", "under_5","under_5.25","under_5.5","under_6.5","under_7.5", "under_8.5"):=NULL]

pat1x2=asdas1x2[complete.cases(asdas1x2)]
dat1x2<-pat1x2[,-c(17:20)]
ress1x2<-pat1x2[,c(20)]
a1x2<-as.numeric(unlist(ress1x2[,1]))

pc1x2 <- princomp(dat1x2)
plot(pc1x2)

summary(pc1x2, loadings=T)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3    Comp.4     Comp.5
## Standard deviation     4.5147411 2.1823122 1.41829685 0.8848934 0.75133113
## Proportion of Variance 0.7046297 0.1646376 0.06953917 0.0270693 0.01951452
## Cumulative Proportion  0.7046297 0.8692672 0.93880640 0.9658757 0.98539023
##                             Comp.6      Comp.7      Comp.8      Comp.9
## Standard deviation     0.493618466 0.261880530 0.193129893 0.182370733
## Proportion of Variance 0.008423217 0.002370837 0.001289419 0.001149756
## Cumulative Proportion  0.993813450 0.996184287 0.997473706 0.998623462
##                             Comp.10      Comp.11      Comp.12      Comp.13
## Standard deviation     0.1151012584 0.1067004306 0.0784575025 0.0748793518
## Proportion of Variance 0.0004579893 0.0003935751 0.0002127964 0.0001938293
## Cumulative Proportion  0.9990814509 0.9994750260 0.9996878223 0.9998816516
##                             Comp.14      Comp.15      Comp.16
## Standard deviation     4.433805e-02 3.203779e-02 2.076518e-02
## Proportion of Variance 6.795922e-05 3.548301e-05 1.490619e-05
## Cumulative Proportion  9.999496e-01 9.999851e-01 1.000000e+00
## 
## Loadings:
##           Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## 1_NA       0.157  0.410  0.319  0.264  0.198  0.672  0.189              
## 12_NA                                                                   
## 1X_NA             0.164  0.113                0.212                     
## 2_NA      -0.557 -0.123  0.651 -0.422                       0.130  0.221
## NO_NA                                 -0.212                0.764 -0.254
## X2_NA     -0.204         0.136                       0.264 -0.412 -0.791
## YES_NA                         -0.136  0.125  0.111         0.383 -0.444
## odd1_NA    0.197  0.662  0.239 -0.156  0.204 -0.456 -0.413        -0.115
## odd2_NA   -0.723  0.215 -0.351  0.393  0.218        -0.314              
## oddX_NA   -0.198  0.414 -0.168               -0.336  0.769  0.132  0.184
## over_1.5                                                                
## over_2.5                -0.164 -0.236  0.135  0.104                     
## over_3.5                -0.425 -0.642  0.379  0.287        -0.132       
## under_1.5 -0.119  0.329 -0.113 -0.232 -0.751  0.249 -0.157 -0.199       
## under_2.5         0.114               -0.251                            
## under_3.5                             -0.109                            
##           Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16
## 1_NA                       0.185   0.251                         
## 12_NA              0.171  -0.337           0.914                 
## 1X_NA                     -0.580  -0.692  -0.272                 
## 2_NA                                                             
## NO_NA      0.286   0.417                  -0.118                 
## X2_NA      0.121   0.207                                         
## YES_NA    -0.504  -0.566          -0.116   0.104                 
## odd1_NA                                                          
## odd2_NA                                                          
## oddX_NA                                                          
## over_1.5                                           0.273  -0.952 
## over_2.5   0.287  -0.326  -0.585   0.551  -0.162           0.140 
## over_3.5           0.226   0.236  -0.197                         
## under_1.5 -0.310                                                 
## under_2.5  0.665  -0.515   0.287  -0.244   0.157  -0.183         
## under_3.5  0.119           0.142  -0.121           0.931   0.252
# Get principal component vectors using prcomp instead of princomp
pc1x2 = prcomp(dat1x2,center = TRUE,scale = TRUE) 

# First 7 principal components
comp1x2 <- data.frame(pc1x2$x[,1:7])
# Plot
plot(comp1x2, pch=16, col=a1x2, main="Graph of Different PCs in 2-D")

ind1x2 <- get_pca_ind(pc1x2)
f1x2<-ind1x2$coord

##eigenvalues
eig.val1x2 <- get_eigenvalue(pc1x2)
eig.val1x2
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  5.81520173      36.34501083                    36.34501
## Dim.2  4.26233605      26.63960033                    62.98461
## Dim.3  3.38070808      21.12942550                    84.11404
## Dim.4  1.21447177       7.59044858                    91.70449
## Dim.5  0.37108323       2.31927021                    94.02376
## Dim.6  0.32281186       2.01757413                    96.04133
## Dim.7  0.22805859       1.42536620                    97.46670
## Dim.8  0.13588317       0.84926984                    98.31597
## Dim.9  0.07785377       0.48658606                    98.80255
## Dim.10 0.05470026       0.34187664                    99.14443
## Dim.11 0.03795848       0.23724050                    99.38167
## Dim.12 0.02725929       0.17037057                    99.55204
## Dim.13 0.02272841       0.14205258                    99.69409
## Dim.14 0.02086661       0.13041631                    99.82451
## Dim.15 0.01632796       0.10204978                    99.92656
## Dim.16 0.01175071       0.07344194                   100.00000
fviz_eig(pc1x2)

##graph of indiviudals
fviz_pca_ind(pc1x2,
             #col.ind = "cos2", # Color by the quality of representation
             col.ind = a1x2, # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
             #repel = TRUE     # Avoid text overlapping
)

##graph of variables
fviz_pca_var(pc1x2,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

##graph combined
fviz_pca_biplot(pc1x2, #repel = TRUE,
                col.var = "#2E9FDF", # Variables color
                col.ind = a1x2
                # col.ind = "#696969"  # Individuals color
)

###Task 3

rawimage<-readJPEG('C:\\Users\\10025\\Desktop\\IE582\\hw2/hw2_image1.jpeg')

str(rawimage)
##  num [1:512, 1:512, 1:3] 0.122 0.141 0.141 0.192 0.114 ...
plot(c(0, 512), c(0, 512), type = "n", xlab = "pixels", ylab = "pixels")
rasterImage(rawimage, 0, 0, 512, 512,
            angle = 0, interpolate = TRUE)

red<-rawimage[,,1]
green<-rawimage[,,2]
blue<-rawimage[,,3]


par(mfrow=c(1,3))
image(t(red[nrow(red):1L,]),main="RED")
image(t(green[nrow(green):1L,]),main="GREEN")
image(t(blue[nrow(blue):1L,]),main="BLUE")

noise_1<-matrix(runif(512*512,min = 0,max = 0.1),512,512)
noise_2<-matrix(runif(512*512,min = 0,max = 0.1),512,512)
noise_3<-matrix(runif(512*512,min = 0,max = 0.1),512,512)

noised_image<-array(0,c(512,512,3))
noised_image[,,1]<-red_noise<-red+noise_1
noised_image[,,2]<-green_noise<-green+noise_2
noised_image[,,3]<-blue_noise<-blue+noise_3

noised_image[,,1]<-ifelse(noised_image[,,1]>1,1,noised_image[,,1])
noised_image[,,2]<-ifelse(noised_image[,,2]>1,1,noised_image[,,2])
noised_image[,,3]<-ifelse(noised_image[,,3]>1,1,noised_image[,,3])

par(mfrow=c(1,2))
plot(c(0, 512), c(0, 512), type = "n",main="original image", xlab = "", ylab = "")
rasterImage(rawimage, 0, 0, 512, 512,
            angle = 0, interpolate = TRUE)
plot(c(0, 512), c(0, 512), type = "n",main="noised image", xlab = "", ylab = "")
rasterImage(noised_image, 0, 0, 512, 512,
            angle = 0, interpolate = TRUE)

gray<-noised_image[,,1]+noised_image[,,2]+noised_image[,,3]
gray<-gray/max(gray)

par(mfrow=c(1,1))
plot(c(0, 512), c(0, 512), type = "n",main="grayscale image", xlab = "", ylab = "")
rasterImage(gray, 0, 0, 512, 512,
            angle = 0, interpolate = TRUE)

noised_image<-array(0,c(512,512,3))

matrix<-matrix(0,260100,9)

for (i in 2:511) {
  for(j in 2:511) {
    matrix[510*(i-2)+j-1,1]<-gray[i-1,j-1]
    matrix[510*(i-2)+j-1,2]<-gray[i-1,j]
    matrix[510*(i-2)+j-1,3]<-gray[i-1,j+1]
    matrix[510*(i-2)+j-1,4]<-gray[i,j-1]
    matrix[510*(i-2)+j-1,5]<-gray[i,j]
    matrix[510*(i-2)+j-1,6]<-gray[i,j+1]
    matrix[510*(i-2)+j-1,7]<-gray[i+1,j-1]
    matrix[510*(i-2)+j-1,8]<-gray[i+1,j]
    matrix[510*(i-2)+j-1,9]<-gray[i+1,j+1]
  }
}
dat<-matrix
colnames(dat) <- c("Upper left","upper","upper right","middle left","central","middle right","lower left","lower","lower right")



pc <- princomp(dat)
plot(pc)

summary(pc, loadings=T)
## Importance of components:
##                           Comp.1     Comp.2      Comp.3      Comp.4
## Standard deviation     0.6529131 0.06840902 0.049278038 0.038235138
## Proportion of Variance 0.9694645 0.01064260 0.005522401 0.003324654
## Cumulative Proportion  0.9694645 0.98010708 0.985629478 0.988954132
##                             Comp.5     Comp.6      Comp.7      Comp.8
## Standard deviation     0.033436664 0.03139831 0.030713139 0.030091081
## Proportion of Variance 0.002542536 0.00224199 0.002145209 0.002059192
## Cumulative Proportion  0.991496668 0.99373866 0.995883866 0.997943058
##                             Comp.9
## Standard deviation     0.030074642
## Proportion of Variance 0.002056942
## Cumulative Proportion  1.000000000
## 
## Loadings:
##              Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Upper left    0.332  0.394  0.407  0.170  0.544  0.295  0.225       
## upper         0.334         0.426 -0.447        -0.564 -0.238  0.358
## upper right   0.332 -0.410  0.391  0.302 -0.445  0.309  0.258  0.213
## middle left   0.333  0.420         0.231               -0.469 -0.282
## central       0.335               -0.502 -0.100         0.455 -0.644
## middle right  0.333 -0.420         0.230               -0.474 -0.387
## lower left    0.332  0.410 -0.390  0.303 -0.443 -0.310  0.256  0.160
## lower         0.334        -0.426 -0.448         0.563 -0.238  0.358
## lower right   0.333 -0.395 -0.407  0.171  0.545 -0.292  0.227  0.142
##              Comp.9
## Upper left    0.314
## upper              
## upper right  -0.270
## middle left  -0.599
## central            
## middle right  0.534
## lower left    0.307
## lower              
## lower right  -0.292
# Get principal component vectors using prcomp instead of princomp
pc = prcomp(dat,center = TRUE,scale = TRUE) 

# First 9 principal components
comp <- data.frame(pc$x[,1:9])

#Results of PCA
ind <- get_pca_ind(pc)

# Eigenvalues
eig.val <- get_eigenvalue(pc)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 8.72517938       96.9464375                    96.94644
## Dim.2 0.09578397        1.0642663                    98.01070
## Dim.3 0.04970231        0.5522479                    98.56295
## Dim.4 0.02992167        0.3324630                    98.89541
## Dim.5 0.02288327        0.2542586                    99.14967
## Dim.6 0.02017809        0.2242010                    99.37387
## Dim.7 0.01930660        0.2145178                    99.58839
## Dim.8 0.01853219        0.2059132                    99.79431
## Dim.9 0.01851251        0.2056946                   100.00000
fviz_eig(pc)

##graph of variables
fviz_pca_var(pc,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

# Results for Variables
res.var <- get_pca_var(pc)

# Results for individuals
res.ind <- get_pca_ind(pc)

# Contribution to the first dimension
fviz_contrib(pc,choice = ("var"),axes = 1)

# Contribution to the second dimension
fviz_contrib(pc,choice = ("var"),axes = 2)

soul_image<-pc$x[,1]

new<-matrix(0,510,510)

for(k in 1:510){
  for(l in 1:510){
    new[k,l]<-soul_image[510*(k-1)+l]
  }
} 
new_image<-normalizeData(new,type="0_1")


plot(c(0, 510), c(0, 510),main="PC1 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)

soul_image2<-pc$x[,2]

new2<-matrix(0,510,510)

for(k in 1:510){
  for(l in 1:510){
    new2[k,l]<-soul_image2[510*(k-1)+l]
  }
} 
new_image2<-normalizeData(new2,type="0_1")

plot(c(0, 510), c(0, 510),main="PC2 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image2, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)

soul_image3<-pc$x[,3]

new3<-matrix(0,510,510)

for(k in 1:510){
  for(l in 1:510){
    new3[k,l]<-soul_image3[510*(k-1)+l]
  }
} 
new_image3<-normalizeData(new3,type="0_1")

plot(c(0, 510), c(0, 510),main="PC3 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image3, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)

par(mfrow=c(1,3))
plot(c(0, 510), c(0, 510),main="PC1 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)
plot(c(0, 510), c(0, 510),main="PC2 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image2, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)
plot(c(0, 510), c(0, 510),main="PC3 Image", type = "n", xlab = "", ylab = "")
rasterImage(new_image3, 0, 0, 510, 510,
            angle = 0, interpolate = TRUE)